#clean up
rm(list=ls())

#set seed
set.seed(715130425)

#load libraries
library(akima)
library(geoR)
library(scatterplot3d)  
library(ggmap)
library(lattice)
library(foreign)
library(maps)
library(spdep)
#install.packages('/Volumes/rgdal/rgdal_0.9-1.tgz',repos=NULL)
library(rgdal)
#install.packages('/Users/jamie/Downloads/prevR_2.9.tgz',repos=NULL)
library(prevR)
library(car)
library(xtable)
library(maptools)

#options
options(scipen=8)

#Set to working directory. Adjust the command below to reflect YOUR working directory.
#setwd('/Volumes/MONOGAN/gillAreal/ccesCode/')
#setwd('/Users/jamie/Documents/gillAreal/ccesCode/')
#setwd('/Users/jamie/Desktop/nhgis0003_shape/nhgis0003_shapefile_us_tract_1970/')

#load survey data
combined<-read.dta('cces08reformatted.dta')

#load ZIP code data
zip.0<-read.fwf('zcta5.txt', widths=c(2,5,59,9,9,14,14,12,12,10,10),col.names=c('state','zip','zcta','population','housingUnits','landMeters','waterMeters','landMiles','waterMiles','latitude','longitude'))
zip.0$zipLandKM<-zip.0$landMeters/(1000^2)
zip.0$zipPop<-zip.0$population
zip.1<-subset(zip.0, subset=state!='AK'&state!='HI'&state!='PR', select=c(state,zip,latitude,longitude,zipPop,zipLandKM))

#add state fips codes
state.fips<-c(2,1,5,60,4,6,8,9,11,10,12,13,66,15,19,16,17,18,20,21,22,25,24,23,26,27,29,28,30,37,38,31,33,34,35,32,36,39,40,41,42,72,44,45,46,47,48,49,51,78,50,53,55,54,56)
abbr<-c('AK','AL','AR','AS','AZ','CA','CO','CT','DC','DE','FL','GA','GU','HI','IA','ID','IL','IN','KS','KY','LA','MA','MD','ME','MI','MN','MO','MS','MT','NC','ND','NE','NH','NJ','NM','NV','NY','OH','OK','OR','PA','PR','RI','SC','SD','TN','TX','UT','VA','VI','VT','WA','WI','WV','WY')
fips.match<-data.frame(state.fips,abbr)
zip<-merge(x=zip.1,y=fips.match,by.x='state',by.y='abbr',all.x=T,all.y=F)

###EMPIRICAL SEMIVARIOGRAM###
ideol.vario<-variog(coords=cbind(combined$eastings,combined$northings), data=combined$ideology)
ideol.robust<-variog(coords=cbind(combined$eastings,combined$northings), data=combined$ideology, estimator.type="modulus")

#pdf('rawVariogram.pdf')
plot(ideol.vario, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(600,800))
par(new=T)
plot(ideol.robust, xlab="", ylab="",axes=F,ylim=c(600,800),pch='+', col='blue')
#dev.off()

###CHOOSE A TREND TERM###
#training v. out-of-sample forecast
n<-dim(combined)[1]
ninety<-sample(1:n,round(.9*n))
training<-combined[ninety,]
forecasting<-combined[-ninety,]

#five versions of the trend
none<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat),data=training);summary(none)#adj r^2=.1308
ssr<-sum((predict(none,forecasting)-mean(predict(none,forecasting)))^2)
sst<-sum((forecasting$ideology-mean(forecasting$ideology))^2)
ssr/sst #.1395993
(19389^(33/19389))*(sst-ssr)/19389 #65.9077

linear<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings+northings,data=training);summary(linear)#adj r^2=.1365
ssr.1<-sum((predict(linear,forecasting)-mean(predict(linear,forecasting)))^2)
ssr.1/sst #.1469531
(19389^(35/19389))*(sst-ssr.1)/19389 #65.41097

quadratic<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2),data=training);summary(quadratic)#adj r^2=.1362
ssr.2<-sum((predict(quadratic,forecasting)-mean(predict(quadratic,forecasting)))^2)
ssr.2/sst #.1488206
19389^(38/19389)*(sst-ssr.2)/19389 #65.36754

cubic<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2)+I(eastings^2)*northings+eastings*I(northings^2)+I(eastings^3)+I(northings^3),data=training);summary(cubic)#adj r^2=.1385
ssr.3<-sum((predict(cubic,forecasting)-mean(predict(cubic,forecasting)))^2)
ssr.3/sst #.1501831
19389^(42/19389)*(sst-ssr.3)/19389 #65.39596

quartic<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)*I(northings^2)+I(eastings^2)*northings+eastings*I(northings^2)+I(eastings^3)+I(northings^3)+northings*I(eastings^3)+eastings*I(northings^3)+I(eastings^4)+I(northings^4),data=training);summary(quartic)#adj r^2=.1388
ssr.4<-sum((predict(quartic,forecasting)-mean(predict(quartic,forecasting)))^2)
ssr.4/sst #.1504514
19389^(47/19389)*(sst-ssr.4)/19389 #65.54197

###QUADRATIC AS THE MODEL; CUBIC AND NO TREND AS COMPARISONS###
ideol.ols<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2),data=combined);summary(ideol.ols)
se<-sqrt(diag(summary(ideol.ols)$cov.unscaled))*summary(ideol.ols)$sigma
xtable(cbind(ideol.ols$coefficients,se,confint(ideol.ols,level=.9)),digits=4)
xtable(cbind(ideol.ols$coefficients,se,confint(ideol.ols,level=.9)),digits=10)
combined$ols.resid<-ideol.ols$residuals

ideol.ols.cub<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2)+I(eastings^2)*northings+eastings*I(northings^2)+I(eastings^3)+I(northings^3),data=combined);summary(ideol.ols.cub)
se<-sqrt(diag(summary(ideol.ols.cub)$cov.unscaled))*summary(ideol.ols.cub)$sigma
xtable(cbind(ideol.ols.cub$coefficients,se,confint(ideol.ols.cub,level=.9)),digits=8)

ideol.ols.notrend<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat),data=combined);summary(ideol.ols.notrend)

###RESIDUAL SEMIVARIOGRAM###
resid.robust<-variog(coords=cbind(combined$eastings,combined$northings), data=combined$ols.resid, estimator.type="modulus")

###A FEW VARIOGRAM MODELS USING WEIGHTED LEAST SQAURES###
wave.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="wave", fix.nugget=FALSE, nugget=700)
wave.fit
21543^(3/21543)* 5638303743/21543 #262087.1

exponential.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="exponential", fix.nugget=FALSE, nugget=700)
exponential.fit
21543^(3/21543)* 5830434615/21543 #271018

matern.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="matern", fix.nugget=FALSE, nugget=700, kappa=1)
matern.fit
21543^(3/21543)* 5652415210/21543 #262743.1

gaussian.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="gaussian", fix.nugget=FALSE, nugget=700)
gaussian.fit
21543^(3/21543)* 5637272224/21543 #262039.2
#      tausq     sigmasq         phi 
#   623.9575   3610.8082 133819.0708 

#graphical output of residual variogram model
#pdf('olsGaussian1.pdf',width=5,height=5)
plot(ideol.robust, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(600,725))
par(new=T)
plot(resid.robust, xlab="", ylab="",axes=F,ylim=c(600,725),pch='+', col='blue')
lines(gaussian.fit, col='blue')
#dev.off()

#pdf('olsGaussian2.pdf',width=5,height=5)
plot(resid.robust, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(610,640),pch='+', col='blue')
lines(gaussian.fit, col='blue')
#dev.off()

###LOAD THE 2000 CENSUS DATA###
#census<-read.dta('census2000.dta')
#obs<-table(census$statefip); obs

### LOAD 2000 CENSUS DATA: NHGIS ZIP code data ###
nhgis.A<-read.csv('nhgis0005_ds146_2000_zcta.csv')
nhgis.A.1<-nhgis.A[,-c(1:37,39)]
no.three<-apply(nhgis.A.1[,3:324],1,nnzero)
nhgis.A.1<-cbind(nhgis.A.1,no.three)
nhgis.B<-read.csv('nhgis0005_ds151_2000_zcta.csv')
no.home<-nhgis.B$F9P001+nhgis.B$F9P002
nhgis.B<-cbind(nhgis.B,no.home)
nzero<-apply(nhgis.B[,77:92],1,nnzero)
nhgis.B<-cbind(nhgis.B,nzero)
no.women.educ<-apply(nhgis.B[,57:72],1,nnzero)
nhgis.B<-cbind(nhgis.B,no.women.educ)
no.men.educ<-apply(nhgis.B[,41:56],1,nnzero)
nhgis.B<-cbind(nhgis.B,no.men.educ)
full.three<-apply(nhgis.A.1[,3:324],2,sum)
full.inc<-apply(nhgis.B[,77:92],2,sum)
full.educ.women<-apply(nhgis.B[,57:72],2,sum)
full.educ.men<-apply(nhgis.B[,41:56],2,sum)
#table(zip$zip%in%nhgis.A.1$ZCTAA)

#reformat because zip.list loses leading zeroes
HH<-grep(pattern="HH",x=nhgis.A.1$ZCTAA)
XX<-grep(pattern="XX",x=nhgis.A.1$ZCTAA)
nhgis.A.1<-nhgis.A.1[-c(HH,XX),]
nhgis.A.1$ZCTAA<-as.numeric(as.character(nhgis.A.1$ZCTAA))

HH<-grep(pattern="HH",x=nhgis.B$ZCTAA)
XX<-grep(pattern="XX",x=nhgis.B$ZCTAA)
nhgis.B<-nhgis.B[-c(HH,XX),]
nhgis.B$ZCTAA<-as.numeric(as.character(nhgis.B$ZCTAA))

###LOAD COUNTY-BASED DATA###
#USDA urban-rural continuum codes by county
#http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UfCK11OE4xc
usda.0<-read.csv('ruralurbancodes2003b.csv')
#subtract 1 to have a 0 origin
usda.0$rural<-usda.0$X2003.Rural.urban.Continuum.Code-1
#create population variable for rejection sampler
usda.0$X2000.Population[is.na(usda.0$X2000.Population)]<-0
state.maxes<-as.matrix(by(usda.0$X2000.Population,usda.0$State,max))
usda.0$pop2000<-usda.0$X2000.Population/state.maxes[usda.0$State]

#subsetting
usda.1<-subset(usda.0, select=c(FIPS.Code,State,County.Name,rural,pop2000))
usda.1$County.Name<-tolower(usda.1$County.Name)
usda.1$County.Name<-sub(" county", "", usda.1$County.Name)
usda.1$County.Name<-sub(" parish", "", usda.1$County.Name)
usda.1$County.Name<-sub("\\.", "", usda.1$County.Name)
usda.1$County.Name[usda.1$County.Name=="dekalb"]<-"de kalb" 
usda.1$County.Name[usda.1$County.Name=="lamoure"]<-"la moure"
usda.1$County.Name[usda.1$County.Name=="laporte"]<-"la porte"
usda.1$County.Name[usda.1$County.Name=="dupage"]<-"du page"
usda.1$County.Name[usda.1$County.Name=="desoto"]<-"de soto"
usda.1$County.Name[usda.1$County.Name=="dewitt"]<-"de witt"
usda.1$County.Name[!usda.1$County.Name%in%c("baltimore city","st louis city","carson city","charles city","james city")]<-sub(" city","",usda.1$County.Name[!usda.1$County.Name%in%c("baltimore city","st louis city","carson city","charles city","james city")])
usda<-subset(usda.1,State!="AK" & State !="HI")

#ARDA religious census
#http://www.thearda.com/Archive/Files/Descriptions/RCMSCY.asp
#Mainline: http://www.thearda.com/rcms2010/mainline.asp
#Evangelical: http://www.thearda.com/rcms2010/evangelical.asp
arda.0<-read.dta('arda2000.DTA')
arda.0$county<-tolower(arda.0$county)
arda.1<-subset(arda.0,select=c(mainrt,evanrt,cathrt,orthrt,jewrt,islamrt,ldsrt,totrt,fip))
arda.1$other<-1000-arda.1$mainrt-arda.1$evanrt-arda.1$cathrt-arda.1$orthrt-arda.1$jewrt-arda.1$islamrt-arda.1$ldsrt
arda.1$other[arda.1$other<0]<-0
ardaUSDA<-merge(x=arda.1,y=usda,by.x='fip',by.y='FIPS.Code')
ardaUSDA[3074,]<-c(48301,0,73,0,0,0,0,0,73,0,'TX','loving',8,.00000001)

###GENERATE LOCATIONS AND DRAW FROM CENSUS DATA FOR EACH###
##set up nationwide county map to link kriged points with
us.counties<-map("county",fill=FALSE, plot=TRUE, resolution=0)
IDs.2 <- us.counties$names
us.counties.1<-map(database="county",region=IDs.2, fill=TRUE, plot=TRUE, interior=TRUE, exact=TRUE, resolution=0)
a.2<-map2SpatialPolygons(us.counties.1, IDs=IDs.2, proj4string=CRS("+proj=longlat +datum=NAD83"))
#us.map.1<-spTransform(a.1,CRS("+init=esri:102003"))
us.counties.2<-spTransform(a.2,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))

##set up map of Virginia to link kriged points with
#Source for 2008 Virginia congressional maps:
#ftp://ftp2.census.gov/geo/tiger/TIGER2008/51_VIRGINIA/
va.cd.0<-readShapePoly(fn="tl_2008_51_cd110/tl_2008_51_cd110.shp", proj4string=CRS("+proj=longlat +datum=NAD83"))
va.cd.1<-spTransform(va.cd.0,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
#convert to spatial polygons
va.cd.2<-as(va.cd.1,'SpatialPolygons')

##lookup info for counties and states
state.id<-rep(NA,3082)
for(i in 1:3082){state.id[i]<-strsplit(names(us.counties.2),split=",")[[i]][1]}
state.id<-recode(state.id,'"alabama"="AL";"arizona"="AZ";"arkansas"="AR";"california"="CA";"colorado"="CO";"connecticut"="CT";"delaware"="DE";"district of columbia"="DC";"florida"="FL";"georgia"="GA";"idaho"="ID";"illinois"="IL";"indiana"="IN";"iowa"="IA";"kansas"="KS";"kentucky"="KY";"louisiana"="LA";"maine"="ME";"maryland"="MD";"massachusetts"="MA";"michigan"="MI";"minnesota"="MN";"mississippi"="MS";"missouri"="MO";"montana"="MT";"nebraska"="NE";"nevada"="NV";"new hampshire"="NH";"new jersey"="NJ";"new mexico"="NM";"new york"="NY";"north carolina"="NC";"north dakota"="ND";"ohio"="OH";"oklahoma"="OK";"oregon"="OR";"pennsylvania"="PA";"rhode island"="RI";"south carolina"="SC";"south dakota"="SD";"tennessee"="TN";"texas"="TX";"utah"="UT";"vermont"="VT";"virginia"="VA";"washington"="WA";"west virginia"="WV";"wisconsin"="WI";"wyoming"="WY"')
county.id.0<-county.id<-rep(NA,3082)
for(i in 1:3082){county.id[i]<-strsplit(names(us.counties.2),split=",")[[i]][2]}
county.id[county.id=="dade" & state.id=="FL"]<-"miami-dade"
county.id<-sub(":main","",county.id)
county.id<-sub(":penrose","",county.id)
county.id<-sub(":lopez island","",county.id)
county.id<-sub(":orcas island","",county.id)
county.id<-sub(":san juan island","",county.id)
county.id<-sub(":chincoteague","",county.id)
county.id<-sub(":spit","",county.id)
county.id<-sub(":knotts","",county.id)
county.id<-sub(":north","",county.id)
county.id<-sub(":south","",county.id)
county.id[county.id=="obrien" & state.id=="IA"]<-"o'brien"
county.id[county.id=="prince georges" & state.id=="MD"]<-"prince george's"
county.id[county.id=="queen annes" & state.id=="MD"]<-"queen anne's"
county.id[county.id=="st marys" & state.id=="MD"]<-"st mary's"
county.id[county.id=="washington" & state.id=="DC"]<-"district of columbia"

###Virginia kriges first###
zip.va<-subset(zip,state=="VA")
N<-50000

#draw zip codes in proportion to population
zip.va$k.va.zip<-rmultinom(1,size=N,prob=zip.va$zipPop)
x.va<-rep(zip.va$longitude,zip.va$k.va.zip)
y.va<-rep(zip.va$latitude,zip.va$k.va.zip)
va.coords<-as.data.frame(cbind(x.va,y.va))
va.zip.list.0<-rep(zip.va$zip,zip.va$k.va.zip)
va.zip.list.0[va.zip.list.0=='228XX']<-22801
va.zip.list<-as.numeric(as.character(va.zip.list.0))

#reproject coordinates
coordinates(va.coords)<-c('x.va','y.va')
proj4string(va.coords)<-CRS("+proj=longlat +datum=NAD83")
va.coords.2<-spTransform(va.coords,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
colnames(va.coords.2@coords)<-c('eastings','northings')
va.coords.3<-as.data.frame(va.coords.2@coords)

radii<-sqrt(rep(zip.va$zipLandKM,zip.va$k.va.zip)/pi)#radii to jitter by
radii[radii==0]<-.1
for(i in 1:length(va.coords.3$eastings)){
	va.coords.3$eastings[i]<-jitter(va.coords.3$eastings[i],amount=radii[i])
	va.coords.3$northings[i]<-jitter(va.coords.3$northings[i],amount=radii[i])
	}
dim(va.coords.3)
plot(x=va.coords.3$eastings,y=va.coords.3$northings,pch='.')

	#find the right polygon
	joint<-SpatialPoints(va.coords.3,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
	pol.no<-joint%over%va.cd.2
	cd<-recode(pol.no,'1=6;2=9;3=10;4=1;5=11;6=5;7=7;8=8;9=4;10=3;11=2')
	sum(table(cd))
	county.no<-joint%over%us.counties.2
	linkup<-rep(NA,N)
	for(i in 1:N){
		linkup[i]<-ifelse(!is.na(state.id[county.no[i]]), which(ardaUSDA$County.Name==county.id[county.no[i]] & ardaUSDA$State==state.id[county.no[i]]), NA)
		}
	spat.info.0<-cbind(va.coords.3$eastings,va.coords.3$northings,cd,county.id[county.no],state.id[county.no],ardaUSDA$rural[linkup],51,va.zip.list)
	colnames(spat.info.0)[8]<-"zip.list"
	spat.info.1<-spat.info.0[!is.na(spat.info.0[,3]) & !is.na(spat.info.0[,4]),]
	spat.info<-spat.info.2<-spat.info.1[order(spat.info.1[,3]),]
	linkup.2<-linkup[!is.na(spat.info.0[,3]) & !is.na(spat.info.0[,4])]
	linkup.2<-linkup.2[order(spat.info.1[,3])]
	
	#record district codes
	no.dist<-table(spat.info.2[,3])
	no.dist[paste(c(1:11))]

#plug in covariate values
covariates<-matrix(NA,nrow=dim(spat.info)[1],ncol=8)
colnames(covariates)<-c('statefip','ownershp','age','race','educ','empstat','ftotinc','female')

##output for religion
religion.K<-matrix(NA,nrow=dim(spat.info)[1],ncol=8)
colnames(religion.K)<-c('other','cath','mormon','ortho','jewish','islam','main','evan')

#make the draws
for(i in 1:dim(spat.info)[1]){
	covariates[i,1]<-51 #VA FIPS code
	
	#draw religion
	religion.K[i,]<-rmultinom(1,1,prob=c(ardaUSDA$other[linkup.2[i]],ardaUSDA$cathrt[linkup.2[i]],ardaUSDA$ldsrt[linkup.2[i]],ardaUSDA$orthrt[linkup.2[i]],ardaUSDA$jewrt[linkup.2[i]],ardaUSDA$islamrt[linkup.2[i]],ardaUSDA$mainrt[linkup.2[i]],ardaUSDA$evanrt[linkup.2[i]]))

	#DRAW FROM CENSUS DATA#
	d.zip<-spat.info[i,8]
	#print(d.zip)
	#nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,]
	#nhgis.A[nhgis.A$ZCTAA==d.zip,]
	
	##Draw from the joint distribution of race, age, and sex##
	if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0 & which(nhgis.A.1$ZCTAA==d.zip)==dim(nhgis.A.1)[1]){three.draw<-which.max(rmultinom(1,1,prob=full.three))
		} else if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0 & nhgis.A.1[which(nhgis.A.1$ZCTAA==d.zip)+1,325]==0){three.draw<-which.max(rmultinom(1,1,prob=full.three))
			} else if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0){three.draw<-which.max(rmultinom(1,1,prob=nhgis.A.1[which(nhgis.A.1$ZCTAA==d.zip)+1,3:324]))
				} else{three.draw<-which.max(rmultinom(1,1,prob=nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,3:324]))}
	
	#re-run until drawing from VAP
	while(three.draw%in%c(1:4,24:27,47:50,70:73,93:96,116:119,139:142,162:165,185:188,208:211,231:234,254:257,277:280,300:303)){
		if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0 & which(nhgis.A.1$ZCTAA==d.zip)==dim(nhgis.A.1)[1]){three.draw<-which.max(rmultinom(1,1,prob=full.three))
			} else if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0 & nhgis.A.1[which(nhgis.A.1$ZCTAA==d.zip)+1,325]==0){three.draw<-which.max(rmultinom(1,1,prob=full.three))
				} else if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0){three.draw<-which.max(rmultinom(1,1,prob=nhgis.A.1[which(nhgis.A.1$ZCTAA==d.zip)+1,3:324]))
					} else{three.draw<-which.max(rmultinom(1,1,prob=nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,3:324]))}
	}
	
	#sex
	covariates[i,8]<-as.numeric(three.draw%in%c(24:46,70:92,116:138,162:184,208:230,254:276,300:322))
	#race
	if(three.draw%in%c(1:46)){covariates[i,4]<-1} else{ if(three.draw%in%c(47:92)){covariates[i,4]<-2} else{covariates[i,4]<-3}}
	#age; assume the maximum is 98, which is what we see in the CCES
	if(three.draw%in%c(5,28,51,74,97,120,143,166,189,212,235,258,281,304)){	age<-which.max(rmultinom(1,1,prob=c(138,122)))+17
		}else if(three.draw%in%c(6,29,52,75,98,121,144,167,190,213,236,259,282,305)){age<-20 
			}else if(three.draw%in%c(7,30,53,76,99,120,145,168,191,214,237,260,283,306)){age<-21 
				}else if(three.draw%in%c(8,31,54,77,100,121,146,169,192,215,238,261,284,307)){age<-which.max(rmultinom(1,1,prob=c(125,142,170)))+21
					}else if(three.draw%in%c(9,32,55,78,101,122,147,170,193,216,239,262,285,308)){age<-which.max(rmultinom(1,1,prob=c(198,221,240,263,225)))+24
						}else if(three.draw%in%c(10,33,56,79,102,123,148,171,194,217,240,263,286,309)){age<-which.max(rmultinom(1,1,prob=c(287,288,279,325,313)))+29
							}else if(three.draw%in%c(11,34,57,80,103,124,149,172,195,218,241,264,287,310)){age<-which.max(rmultinom(1,1,prob=c(282,269,321,371,317)))+34
								}else if(three.draw%in%c(12,35,58,81,104,125,150,173,196,219,242,265,288,311)){age<-which.max(rmultinom(1,1,prob=c(378,404,402,447,481)))+39
									}else if(three.draw%in%c(13,36,59,82,105,126,151,174,197,220,243,266,289,312)){age<-which.max(rmultinom(1,1,prob=c(525,532,581,552,580)))+44
										}else if(three.draw%in%c(14,37,60,83,106,127,152,175,198,221,244,267,290,313)){age<-which.max(rmultinom(1,1,prob=c(524,626,624,641,611)))+49
											}else if(three.draw%in%c(15,38,61,84,107,128,153,176,199,222,245,268,291,314)){age<-which.max(rmultinom(1,1,prob=c(503,472,459,462,442)))+54
												}else if(three.draw%in%c(16,39,62,85,108,129,154,177,200,223,246,269,292,315)){age<-which.max(rmultinom(1,1,prob=c(485,512)))+59
													}else if(three.draw%in%c(17,40,63,86,109,130,155,178,201,224,247,270,293,316)){age<-which.max(rmultinom(1,1,prob=c(449,375,340)))+61
														}else if(three.draw%in%c(18,41,64,87,110,131,156,179,202,225,248,271,294,317)){age<-which.max(rmultinom(1,1,prob=c(550,557)))+64
															}else if(three.draw%in%c(19,42,65,88,111,132,157,180,203,226,249,272,295,318)){age<-which.max(rmultinom(1,1,prob=c(476,440,338)))+66
																}else if(three.draw%in%c(20,43,66,89,112,133,158,181,204,227,250,273,296,319)){age<-which.max(rmultinom(1,1,prob=c(333,275,266,218,194)))+69
																	}else if(three.draw%in%c(21,44,67,90,113,134,159,182,205,228,251,274,297,320)){age<-which.max(rmultinom(1,1,prob=c(169,144,141,112,111)))+74
																		}else if(three.draw%in%c(22,45,68,91,114,135,160,183,206,229,252,275,298,321)){age<-which.max(rmultinom(1,1,prob=c(67,59,44,28,25)))+79
																			}else {age<-which.max(rmultinom(1,1,prob=c(15,17,12,5,5,4,2,2,1,1,1,1,1,1)))+84}
	covariates[i,3]<-age							
	#three.draw; covariates[1,c(8,4,3)]	#checker
	
	##Draw from homeownership##
	if(nhgis.B[nhgis.B$ZCTAA==d.zip,95]==0 & which(nhgis.B$ZCTAA==d.zip)==dim(nhgis.B)[1]){own.prob<-.6923026
		} else if(nhgis.B[nhgis.B$ZCTAA==d.zip,95]==0 & nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,95]==0){own.prob<-.6923026
			} else if(nhgis.B[nhgis.B$ZCTAA==d.zip,95]==0){own.prob<-nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,'F9P001']/(nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,'F9P001']+nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,'F9P002'])
				} else{own.prob<-nhgis.B[nhgis.B$ZCTAA==d.zip,'F9P001']/(nhgis.B[nhgis.B$ZCTAA==d.zip,'F9P001']+nhgis.B[nhgis.B$ZCTAA==d.zip,'F9P002'])}
	covariates[i,2]<-rbinom(1,1,own.prob)

	##Draw from employment status##
	#compute men and women who are out of labor force
	out.women<- sum(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,c(24:46,70:92,116:138,162:184,208:230,254:276,300:322)+2])-nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR003']-nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR004']
	out.men<- sum(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,c(1:23,47:69,93:115,139:161,185:207,231:253,277:299)+2])-nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR001']-nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR002']
	out.women<-ifelse(out.women<0,yes=0,no=out.women)
	out.men<-ifelse(out.men<0,yes=0,no=out.men)
	
	#draw conditional on sex
	if(covariates[i,8]==1){covariates[i,6]<-which.max(rmultinom(1,1,prob=c(nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR003'],nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR004'],out.women)))
		}else{covariates[i,6]<-which.max(rmultinom(1,1,prob=c(nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR001'],nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR002'],out.men)))}
	
	##draw family income##
	#for non-aligned Census v. CCES groups, CCES group is assigned based on population income distribution (contingent on Census group)
	if(nhgis.B[nhgis.B$ZCTAA==d.zip,96]==0 & which(nhgis.B$ZCTAA==d.zip)==dim(nhgis.B)[1]){draw.inc<-which.max(rmultinom(1,1,prob=full.inc))
		} else if(nhgis.B[nhgis.B$ZCTAA==d.zip,96]==0 & nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,96]==0){draw.inc<-which.max(rmultinom(1,1,prob=full.inc))
			}else if(nhgis.B[nhgis.B$ZCTAA==d.zip,96]==0){draw.inc<-which.max(rmultinom(1,1,prob=nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,77:92]))
				} else{draw.inc<-which.max(rmultinom(1,1,prob=nhgis.B[nhgis.B$ZCTAA==d.zip,77:92]))}
	if(draw.inc%in%c(1:5)){hold<-draw.inc
		}else if(draw.inc%in%c(6:7)){hold<-6
			}else if(draw.inc%in%c(8:9)){hold<-7
				}else if(draw.inc==10){hold<-8
					}else if(draw.inc==11){hold<-9+rbinom(1,1,0.3571429)
						} else if(draw.inc==12){hold<-10+rbinom(1,1,0.8235294)
							} else if(draw.inc==13){hold<-12+rbinom(1,1,0.1666667)
								} else if(draw.inc==14){hold<-13
									} else if(draw.inc%in%c(15,16)){hold<-14}
	covariates[i,7]<-hold

	##draw education conditional on sex##
	if(covariates[i,8]==1 & nhgis.B[nhgis.B$ZCTAA==d.zip,97]==0 & which(nhgis.B$ZCTAA==d.zip)==dim(nhgis.B)[1]){educ.draw<-which.max(rmultinom(1,1,prob=full.educ.women))
		} else if(covariates[i,8]==1 & nhgis.B[nhgis.B$ZCTAA==d.zip,97]==0 & nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,97]==0){educ.draw<-which.max(rmultinom(1,1,prob=full.educ.women))
			}else if(covariates[i,8]==1 & nhgis.B[nhgis.B$ZCTAA==d.zip,97]==0){educ.draw<-which.max(rmultinom(1,1,prob=nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,57:72]))
				} else if(covariates[i,8]==1){educ.draw<-which.max(rmultinom(1,1,prob=nhgis.B[nhgis.B$ZCTAA==d.zip,57:72]))
					} else if(covariates[i,8]==0 & nhgis.B[nhgis.B$ZCTAA==d.zip,98]==0 & which(nhgis.B$ZCTAA==d.zip)==dim(nhgis.B)[1]){educ.draw<-which.max(rmultinom(1,1,prob=full.educ.men))
						} else if(covariates[i,8]==0 & nhgis.B[nhgis.B$ZCTAA==d.zip,98]==0 & nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,98]==0){educ.draw<-which.max(rmultinom(1,1,prob=full.educ.men))
							}else if(covariates[i,8]==0 & nhgis.B[nhgis.B$ZCTAA==d.zip,98]==0){educ.draw<-which.max(rmultinom(1,1,prob=nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,41:56]))
								} else {educ.draw<-which.max(rmultinom(1,1,prob=nhgis.B[nhgis.B$ZCTAA==d.zip,41:56]))}
	covariates[i,5]<-recode(educ.draw,"1:8=0;9=1;10:11=2;12=3;13=4;14:16=5")
}

###now the states###
N<-200000
	
#draw zip codes in proportion to population
zip$k.st.zip<-rmultinom(1,size=N,prob=zip$zipPop)
x.st<-rep(zip$longitude,zip$k.st.zip)
y.st<-rep(zip$latitude,zip$k.st.zip)
st.coords<-as.data.frame(cbind(x.st,y.st))

###THE FOLLOWING LIST TURNS NON-NUMERIC ZIP CODES INTO NEIGHBORING NUMERIC ONES###
###YOU MAY HAVE TO CHANGE THIS LIST TO RETAIN WHATEVER YOU DRAW (though set.seed should help)###
zip.list.0<-rep(zip$zip,zip$k.st.zip)
zip.list.0[zip.list.0=='365XX']<-36501
zip.list.0[zip.list.0=='910XX']<-91001
zip.list.0[zip.list.0=='923XX']<-92301
zip.list.0[zip.list.0=='959XX']<-95901
zip.list.0[zip.list.0=='961XX']<-96101
zip.list.0[zip.list.0=='320XX']<-32008
zip.list.0[zip.list.0=='334XX']<-33401
zip.list.0[zip.list.0=='315XX']<-31501
zip.list.0[zip.list.0=='890XX']<-89001
zip.list.0[zip.list.0=='894XX']<-89403
zip.list.0[zip.list.0=='870XX']<-87001
zip.list.0[zip.list.0=='874XX']<-87401
zip.list.0[zip.list.0=='883XX']<-88301
zip.list.0[zip.list.0=='121XX']<-12106
zip.list.0[zip.list.0=='588XX']<-58801
zip.list.0[zip.list.0=='970XX']<-97001
zip.list.0[zip.list.0=='976XX']<-97601
zip.list.0[zip.list.0=='373XX']<-37301
zip.list.0[zip.list.0=='790XX']<-79001
zip.list.0[zip.list.0=='793XX']<-79311
zip.list.0[zip.list.0=='797XX']<-79701
zip.list.0[zip.list.0=='991XX']<-99101
zip.list.0[zip.list.0=='852XX']<-85201
zip.list.0[zip.list.0=='926XX']<-92602
zip.list.0[zip.list.0=='953XX']<-95301
zip.list.0[zip.list.0=='712XX']<-71201
zip.list.0[zip.list.0=='044XX']<-'04401'
zip.list.0[zip.list.0=='567XX']<-56701
zip.list.0[zip.list.0=='386XX']<-38601
zip.list.0[zip.list.0=='390XX']<-39038
zip.list.0[zip.list.0=='891XX']<-89101
zip.list.0[zip.list.0=='456XX']<-45601
zip.list.0[zip.list.0=='730XX']<-73002
zip.list.0[zip.list.0=='168XX']<-16801
zip.list.0[zip.list.0=='785XX']<-78501
zip.list.0[zip.list.0=='788XX']<-78801
zip.list.0[zip.list.0=='988XX']<-98801
zip.list.0[zip.list.0=='993XX']<-99301
zip.list.0[zip.list.0=='250XX']<-25002
zip.list.0[zip.list.0=='285XX']<-28501
zip.list.0[zip.list.0=='295XX']<-29501
zip.list.0[zip.list.0=='304XX']<-30401
zip.list.0[zip.list.0=='310XX']<-31001
zip.list.0[zip.list.0=='700XX']<-70001
zip.list.0[zip.list.0=='780XX']<-78001
zip.list.0[zip.list.0=='783XX']<-78330
zip.list.0[zip.list.0=='792XX']<-79201
zip.list.0[zip.list.0=='798XX']<-79821
zip.list.0[zip.list.0=='833XX']<-83301
zip.list.0[zip.list.0=='864XX']<-86401
zip.list.0[zip.list.0=='367XX']<-36701
zip.list.0[zip.list.0=='593XX']<-59301
zip.list.0[zip.list.0=='714XX']<-71401
zip.list.0[zip.list.0=='847XX']<-84701
zip.list.0[zip.list.0=='971XX']<-97101
zip.list.1<-as.character(zip.list.0)
zip.list.1<-gsub(pattern="XX",replacement="01",zip.list.1)
zip.list.1<-gsub(pattern="HH",replacement="01",zip.list.1)
zip.list<-as.numeric(as.character(zip.list.1))

#reproject coordinates
coordinates(st.coords)<-c('x.st','y.st')
proj4string(st.coords)<-CRS("+proj=longlat +datum=NAD83")
st.coords.2<-spTransform(st.coords,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
colnames(st.coords.2@coords)<-c('eastings','northings')
st.coords.3<-as.data.frame(st.coords.2@coords)

radii.st<-sqrt(rep(zip$zipLandKM,zip$k.st.zip)/pi)#radii to jitter by
radii.st[radii.st==0]<-.1
for(i in 1:length(st.coords.3$eastings)){
	st.coords.3$eastings[i]<-jitter(st.coords.3$eastings[i],amount=radii.st[i])
	st.coords.3$northings[i]<-jitter(st.coords.3$northings[i],amount=radii.st[i])
	}
dim(st.coords.3)
plot(x=st.coords.3$eastings,y=st.coords.3$northings,pch='.')

	#find the right polygon
	joint.2<-SpatialPoints(st.coords.3,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
	county.no.2<-joint.2%over%us.counties.2
	linkup.B<-rep(NA,N)
	for(i in 1:N){
		linkup.B[i]<-ifelse(!is.na(state.id[county.no.2[i]]), which(ardaUSDA$County.Name==county.id[county.no.2[i]] & ardaUSDA$State==state.id[county.no.2[i]]), NA)
		}
	spat.info.B.0<-cbind(st.coords.3$eastings,st.coords.3$northings,NA,county.id[county.no.2],state.id[county.no.2],ardaUSDA$rural[linkup.B],trunc(as.numeric(ardaUSDA$fip[linkup.B])/1000),zip.list)
	spat.info.B.1<-spat.info.B.0[!is.na(spat.info.B.0[,4])&!is.na(spat.info.B.0[,7]),]
	spat.info.B.2<-spat.info.B.1[order(as.numeric(spat.info.B.1[,7])),]
	linkup.B.2<-linkup.B[!is.na(spat.info.B.0[,4])&!is.na(spat.info.B.0[,7])]
	linkup.B.2<-linkup.B.2[order(as.numeric(spat.info.B.1[,7]))]
	spat.info.B<-spat.info.B.2[!is.na(spat.info.B.2[,7]),]
	length(linkup.B.2); dim(spat.info.B)
	
	#state summary
	no.state<-table(spat.info.B[,7])
	no.state[paste(c(1,4:6,8:13,16:42,44:51,53:56))]

#fix nonexistent ZIP codes
spat.info.B$zip.list[spat.info.B$zip.list==70701]<-70706
spat.info.B$zip.list[spat.info.B$zip.list==98301]<-98303

#load ZIP code data
zip.0<-read.fwf('zcta5.txt', widths=c(2,5,59,9,9,14,14,12,12,10,10),col.names=c('state','zip','zcta','population','housingUnits','landMeters','waterMeters','landMiles','waterMiles','latitude','longitude'))
zip.0$zipLandKM<-zip.0$landMeters/(1000^2)
zip.0$zipPop<-zip.0$population
zip.1<-subset(zip.0, subset=state!='AK'&state!='HI'&state!='PR', select=c(state,zip,latitude,longitude,zipPop,zipLandKM))

#add state fips codes
state.fips<-c(2,1,5,60,4,6,8,9,11,10,12,13,66,15,19,16,17,18,20,21,22,25,24,23,26,27,29,28,30,37,38,31,33,34,35,32,36,39,40,41,42,72,44,45,46,47,48,49,51,78,50,53,55,54,56)
abbr<-c('AK','AL','AR','AS','AZ','CA','CO','CT','DC','DE','FL','GA','GU','HI','IA','ID','IL','IN','KS','KY','LA','MA','MD','ME','MI','MN','MO','MS','MT','NC','ND','NE','NH','NJ','NM','NV','NY','OH','OK','OR','PA','PR','RI','SC','SD','TN','TX','UT','VA','VI','VT','WA','WI','WV','WY')
fips.match<-data.frame(state.fips,abbr)
zip<-merge(x=zip.1,y=fips.match,by.x='state',by.y='abbr',all.x=T,all.y=F)

### LOAD 2000 CENSUS DATA: NHGIS ZIP code data ###
nhgis.A<-read.csv('nhgis0005_ds146_2000_zcta.csv')
nhgis.A.1<-nhgis.A[,-c(1:37,39)]
no.three<-apply(nhgis.A.1[,3:324],1,nnzero)
nhgis.A.1<-cbind(nhgis.A.1,no.three)
nhgis.B<-read.csv('nhgis0005_ds151_2000_zcta.csv')
no.home<-nhgis.B$F9P001+nhgis.B$F9P002
nhgis.B<-cbind(nhgis.B,no.home)
nzero<-apply(nhgis.B[,77:92],1,nnzero)
nhgis.B<-cbind(nhgis.B,nzero)
no.women.educ<-apply(nhgis.B[,57:72],1,nnzero)
nhgis.B<-cbind(nhgis.B,no.women.educ)
no.men.educ<-apply(nhgis.B[,41:56],1,nnzero)
nhgis.B<-cbind(nhgis.B,no.men.educ)
full.three<-apply(nhgis.A.1[,3:324],2,sum)
full.inc<-apply(nhgis.B[,77:92],2,sum)
full.educ.women<-apply(nhgis.B[,57:72],2,sum)
full.educ.men<-apply(nhgis.B[,41:56],2,sum)
#table(zip$zip%in%nhgis.A.1$ZCTAA)

#reformat because zip.list loses leading zeroes
HH<-grep(pattern="HH",x=nhgis.A.1$ZCTAA)
XX<-grep(pattern="XX",x=nhgis.A.1$ZCTAA)
nhgis.A.1<-nhgis.A.1[-c(HH,XX),]
nhgis.A.1$ZCTAA<-as.numeric(as.character(nhgis.A.1$ZCTAA))

HH<-grep(pattern="HH",x=nhgis.B$ZCTAA)
XX<-grep(pattern="XX",x=nhgis.B$ZCTAA)
nhgis.B<-nhgis.B[-c(HH,XX),]
nhgis.B$ZCTAA<-as.numeric(as.character(nhgis.B$ZCTAA))

###LOAD COUNTY-BASED DATA###
#USDA urban-rural continuum codes by county
#http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UfCK11OE4xc
usda.0<-read.csv('ruralurbancodes2003b.csv')
#subtract 1 to have a 0 origin
usda.0$rural<-usda.0$X2003.Rural.urban.Continuum.Code-1
#create population variable for rejection sampler
usda.0$X2000.Population[is.na(usda.0$X2000.Population)]<-0
state.maxes<-as.matrix(by(usda.0$X2000.Population,usda.0$State,max))
usda.0$pop2000<-usda.0$X2000.Population/state.maxes[usda.0$State]

#subsetting
usda.1<-subset(usda.0, select=c(FIPS.Code,State,County.Name,rural,pop2000))
usda.1$County.Name<-tolower(usda.1$County.Name)
usda.1$County.Name<-sub(" county", "", usda.1$County.Name)
usda.1$County.Name<-sub(" parish", "", usda.1$County.Name)
usda.1$County.Name<-sub("\\.", "", usda.1$County.Name)
usda.1$County.Name[usda.1$County.Name=="dekalb"]<-"de kalb" 
usda.1$County.Name[usda.1$County.Name=="lamoure"]<-"la moure"
usda.1$County.Name[usda.1$County.Name=="laporte"]<-"la porte"
usda.1$County.Name[usda.1$County.Name=="dupage"]<-"du page"
usda.1$County.Name[usda.1$County.Name=="desoto"]<-"de soto"
usda.1$County.Name[usda.1$County.Name=="dewitt"]<-"de witt"
usda.1$County.Name[!usda.1$County.Name%in%c("baltimore city","st louis city","carson city","charles city","james city")]<-sub(" city","",usda.1$County.Name[!usda.1$County.Name%in%c("baltimore city","st louis city","carson city","charles city","james city")])
usda<-subset(usda.1,State!="AK" & State !="HI")

#ARDA religious census
#http://www.thearda.com/Archive/Files/Descriptions/RCMSCY.asp
#Mainline: http://www.thearda.com/rcms2010/mainline.asp
#Evangelical: http://www.thearda.com/rcms2010/evangelical.asp
arda.0<-read.dta('arda2000.DTA')
arda.0$county<-tolower(arda.0$county)
arda.1<-subset(arda.0,select=c(mainrt,evanrt,cathrt,orthrt,jewrt,islamrt,ldsrt,totrt,fip))
arda.1$other<-1000-arda.1$mainrt-arda.1$evanrt-arda.1$cathrt-arda.1$orthrt-arda.1$jewrt-arda.1$islamrt-arda.1$ldsrt
arda.1$other[arda.1$other<0]<-0
ardaUSDA<-merge(x=arda.1,y=usda,by.x='fip',by.y='FIPS.Code')
ardaUSDA[3074,]<-c(48301,0,73,0,0,0,0,0,73,0,'TX','loving',8,.00000001)

###U.S. DRAWS###
###draw covariates for each kriged point###
#census<-read.dta('census2000.dta') # state-level census data (not used)
#obs<-table(census$statefip); obs
k.draws<-dim(spat.info.B)[1]
covariates.B<-matrix(NA,nrow=k.draws,ncol=8)
colnames(covariates.B)<-c('statefip','ownershp','age','race','educ','empstat','ftotinc','female')
religion.K.B<-matrix(NA,nrow=dim(spat.info.B)[1],ncol=8)
colnames(religion.K.B)<-c('other','cath','mormon','ortho','jewish','islam','main','evan')

for(i in 1:k.draws){
	d.zip<-spat.info.B[i,8]
	zip.ind<-which(zip$zip==d.zip)[1]
	#cat('iteration=',i,'/ zip=', d.zip, '/ index=', zip.ind, '\n')
	#nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,]
	#nhgis.A[nhgis.A$ZCTAA==d.zip,]
	
	#draw religion
	religion.K.B[i,]<-rmultinom(1,1,prob=c(ardaUSDA$other[linkup.B.2[i]],ardaUSDA$cathrt[linkup.B.2[i]],ardaUSDA$ldsrt[linkup.B.2[i]],ardaUSDA$orthrt[linkup.B.2[i]],ardaUSDA$jewrt[linkup.B.2[i]],ardaUSDA$islamrt[linkup.B.2[i]],ardaUSDA$mainrt[linkup.B.2[i]],ardaUSDA$evanrt[linkup.B.2[i]]))

	##Record state FIPS code##
	covariates.B[i,1]<-zip[zip.ind,7]
	
	##CENSUS DRAWS##
	##Draw from the joint distribution of race, age, and sex##
	if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0 & which(nhgis.A.1$ZCTAA==d.zip)==dim(nhgis.A.1)[1]){three.draw<-which.max(rmultinom(1,1,prob=full.three))
		} else if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0 & nhgis.A.1[which(nhgis.A.1$ZCTAA==d.zip)+1,325]==0){three.draw<-which.max(rmultinom(1,1,prob=full.three))
			} else if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0){three.draw<-which.max(rmultinom(1,1,prob=nhgis.A.1[which(nhgis.A.1$ZCTAA==d.zip)+1,3:324]))
				} else{three.draw<-which.max(rmultinom(1,1,prob=nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,3:324]))}
	
	#re-run until drawing from VAP
	while(three.draw%in%c(1:4,24:27,47:50,70:73,93:96,116:119,139:142,162:165,185:188,208:211,231:234,254:257,277:280,300:303)){
		if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0 & which(nhgis.A.1$ZCTAA==d.zip)==dim(nhgis.A.1)[1]){three.draw<-which.max(rmultinom(1,1,prob=full.three))
			} else if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0 & nhgis.A.1[which(nhgis.A.1$ZCTAA==d.zip)+1,325]==0){three.draw<-which.max(rmultinom(1,1,prob=full.three))
				} else if(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,325]==0){three.draw<-which.max(rmultinom(1,1,prob=nhgis.A.1[which(nhgis.A.1$ZCTAA==d.zip)+1,3:324]))
					} else{three.draw<-which.max(rmultinom(1,1,prob=nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,3:324]))}
	}
	
	#sex
	covariates.B[i,8]<-as.numeric(three.draw%in%c(24:46,70:92,116:138,162:184,208:230,254:276,300:322))
	#race
	if(three.draw%in%c(1:46)){covariates.B[i,4]<-1} else{ if(three.draw%in%c(47:92)){covariates.B[i,4]<-2} else{covariates.B[i,4]<-3}}
	#age; assume the maximum is 98, which is what we see in the CCES
	if(three.draw%in%c(5,28,51,74,97,120,143,166,189,212,235,258,281,304)){	age<-which.max(rmultinom(1,1,prob=c(138,122)))+17
		}else if(three.draw%in%c(6,29,52,75,98,121,144,167,190,213,236,259,282,305)){age<-20 
			}else if(three.draw%in%c(7,30,53,76,99,120,145,168,191,214,237,260,283,306)){age<-21 
				}else if(three.draw%in%c(8,31,54,77,100,121,146,169,192,215,238,261,284,307)){age<-which.max(rmultinom(1,1,prob=c(125,142,170)))+21
					}else if(three.draw%in%c(9,32,55,78,101,122,147,170,193,216,239,262,285,308)){age<-which.max(rmultinom(1,1,prob=c(198,221,240,263,225)))+24
						}else if(three.draw%in%c(10,33,56,79,102,123,148,171,194,217,240,263,286,309)){age<-which.max(rmultinom(1,1,prob=c(287,288,279,325,313)))+29
							}else if(three.draw%in%c(11,34,57,80,103,124,149,172,195,218,241,264,287,310)){age<-which.max(rmultinom(1,1,prob=c(282,269,321,371,317)))+34
								}else if(three.draw%in%c(12,35,58,81,104,125,150,173,196,219,242,265,288,311)){age<-which.max(rmultinom(1,1,prob=c(378,404,402,447,481)))+39
									}else if(three.draw%in%c(13,36,59,82,105,126,151,174,197,220,243,266,289,312)){age<-which.max(rmultinom(1,1,prob=c(525,532,581,552,580)))+44
										}else if(three.draw%in%c(14,37,60,83,106,127,152,175,198,221,244,267,290,313)){age<-which.max(rmultinom(1,1,prob=c(524,626,624,641,611)))+49
											}else if(three.draw%in%c(15,38,61,84,107,128,153,176,199,222,245,268,291,314)){age<-which.max(rmultinom(1,1,prob=c(503,472,459,462,442)))+54
												}else if(three.draw%in%c(16,39,62,85,108,129,154,177,200,223,246,269,292,315)){age<-which.max(rmultinom(1,1,prob=c(485,512)))+59
													}else if(three.draw%in%c(17,40,63,86,109,130,155,178,201,224,247,270,293,316)){age<-which.max(rmultinom(1,1,prob=c(449,375,340)))+61
														}else if(three.draw%in%c(18,41,64,87,110,131,156,179,202,225,248,271,294,317)){age<-which.max(rmultinom(1,1,prob=c(550,557)))+64
															}else if(three.draw%in%c(19,42,65,88,111,132,157,180,203,226,249,272,295,318)){age<-which.max(rmultinom(1,1,prob=c(476,440,338)))+66
																}else if(three.draw%in%c(20,43,66,89,112,133,158,181,204,227,250,273,296,319)){age<-which.max(rmultinom(1,1,prob=c(333,275,266,218,194)))+69
																	}else if(three.draw%in%c(21,44,67,90,113,134,159,182,205,228,251,274,297,320)){age<-which.max(rmultinom(1,1,prob=c(169,144,141,112,111)))+74
																		}else if(three.draw%in%c(22,45,68,91,114,135,160,183,206,229,252,275,298,321)){age<-which.max(rmultinom(1,1,prob=c(67,59,44,28,25)))+79
																			}else {age<-which.max(rmultinom(1,1,prob=c(15,17,12,5,5,4,2,2,1,1,1,1,1,1)))+84}
	covariates.B[i,3]<-age								
	##Draw from homeownership##
	if(nhgis.B[nhgis.B$ZCTAA==d.zip,95]==0 & which(nhgis.B$ZCTAA==d.zip)==dim(nhgis.B)[1]){own.prob<-.6923026
		} else if(nhgis.B[nhgis.B$ZCTAA==d.zip,95]==0 & nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,95]==0){own.prob<-.6923026
			} else if(nhgis.B[nhgis.B$ZCTAA==d.zip,95]==0){own.prob<-nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,'F9P001']/(nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,'F9P001']+nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,'F9P002'])
				} else{own.prob<-nhgis.B[nhgis.B$ZCTAA==d.zip,'F9P001']/(nhgis.B[nhgis.B$ZCTAA==d.zip,'F9P001']+nhgis.B[nhgis.B$ZCTAA==d.zip,'F9P002'])}
	covariates.B[i,2]<-rbinom(1,1,own.prob)

	##Draw from employment status##
	#compute men and women who are out of labor force
	out.women<- sum(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,c(24:46,70:92,116:138,162:184,208:230,254:276,300:322)+2])-nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR003']-nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR004']
	out.men<- sum(nhgis.A.1[nhgis.A.1$ZCTAA==d.zip,c(1:23,47:69,93:115,139:161,185:207,231:253,277:299)+2])-nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR001']-nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR002']
	out.women<-ifelse(out.women<0,yes=0,no=out.women)
	out.men<-ifelse(out.men<0,yes=0,no=out.men)
	
	#draw conditional on sex
	if(covariates.B[i,8]==1){covariates.B[i,6]<-which.max(rmultinom(1,1,prob=c(nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR003'],nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR004'],out.women)))
		}else{covariates.B[i,6]<-which.max(rmultinom(1,1,prob=c(nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR001'],nhgis.B[nhgis.B$ZCTAA==d.zip,'GLR002'],out.men)))}
	
	##draw family income##
	#for non-aligned Census v. CCES groups, CCES group is assigned based on population income distribution (contingent on Census group)
	if(nhgis.B[nhgis.B$ZCTAA==d.zip,96]==0 & which(nhgis.B$ZCTAA==d.zip)==dim(nhgis.B)[1]){draw.inc<-which.max(rmultinom(1,1,prob=full.inc))
		} else if(nhgis.B[nhgis.B$ZCTAA==d.zip,96]==0 & nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,96]==0){draw.inc<-which.max(rmultinom(1,1,prob=full.inc))
			}else if(nhgis.B[nhgis.B$ZCTAA==d.zip,96]==0){draw.inc<-which.max(rmultinom(1,1,prob=nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,77:92]))
				} else{draw.inc<-which.max(rmultinom(1,1,prob=nhgis.B[nhgis.B$ZCTAA==d.zip,77:92]))}
	if(draw.inc%in%c(1:5)){hold<-draw.inc
		}else if(draw.inc%in%c(6:7)){hold<-6
			}else if(draw.inc%in%c(8:9)){hold<-7
				}else if(draw.inc==10){hold<-8
					}else if(draw.inc==11){hold<-9+rbinom(1,1,0.3571429)
						} else if(draw.inc==12){hold<-10+rbinom(1,1,0.8235294)
							} else if(draw.inc==13){hold<-12+rbinom(1,1,0.1666667)
								} else if(draw.inc==14){hold<-13
									} else if(draw.inc%in%c(15,16)){hold<-14}
	covariates.B[i,7]<-hold

	##draw education conditional on sex##
	if(covariates.B[i,8]==1 & nhgis.B[nhgis.B$ZCTAA==d.zip,97]==0 & which(nhgis.B$ZCTAA==d.zip)==dim(nhgis.B)[1]){educ.draw<-which.max(rmultinom(1,1,prob=full.educ.women))
		} else if(covariates.B[i,8]==1 & nhgis.B[nhgis.B$ZCTAA==d.zip,97]==0 & nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,97]==0){educ.draw<-which.max(rmultinom(1,1,prob=full.educ.women))
			}else if(covariates.B[i,8]==1 & nhgis.B[nhgis.B$ZCTAA==d.zip,97]==0){educ.draw<-which.max(rmultinom(1,1,prob=nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,57:72]))
				} else if(covariates.B[i,8]==1){educ.draw<-which.max(rmultinom(1,1,prob=nhgis.B[nhgis.B$ZCTAA==d.zip,57:72]))
					} else if(covariates.B[i,8]==0 & nhgis.B[nhgis.B$ZCTAA==d.zip,98]==0 & which(nhgis.B$ZCTAA==d.zip)==dim(nhgis.B)[1]){educ.draw<-which.max(rmultinom(1,1,prob=full.educ.men))
						} else if(covariates.B[i,8]==0 & nhgis.B[nhgis.B$ZCTAA==d.zip,98]==0 & nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,98]==0){educ.draw<-which.max(rmultinom(1,1,prob=full.educ.men))
							}else if(covariates.B[i,8]==0 & nhgis.B[nhgis.B$ZCTAA==d.zip,98]==0){educ.draw<-which.max(rmultinom(1,1,prob=nhgis.B[which(nhgis.B$ZCTAA==d.zip)+1,41:56]))
								} else {educ.draw<-which.max(rmultinom(1,1,prob=nhgis.B[nhgis.B$ZCTAA==d.zip,41:56]))}
	covariates.B[i,5]<-recode(educ.draw,"1:8=0;9=1;10:11=2;12=3;13=4;14:16=5")
}

#check that number of structural observations and number of spatial points is the same
spat.info.B<-spat.info.B[!is.na(spat.info.B[,4]),]
dim(covariates.B); dim(spat.info.B)

##cleanup of the sampled covariates and kriged points
religion.all<-rbind(religion.K,religion.K.B)
dim(religion.all)
covariates.all<-rbind(covariates,covariates.B)
dim(covariates.all)
colnames(spat.info)[1:7]<-c('east.K','north.K','cd','county','state','rural','fips')
colnames(spat.info.B)[1:7]<-c('east.K','north.K','cd','county','state','rural','fips')
spat.info.all<-rbind(spat.info,spat.info.B)
dim(spat.info.all[,1:7]); length(spat.info.all[,8])
final.covariates<-cbind(covariates.all,spat.info.all[,1:7],religion.all,spat.info.all[,8])
colnames(final.covariates)[24]<-'zip'
dim(final.covariates)
final.covariates$statefip[final.covariates$state=="CT"]<-9
final.covariates$statefip[final.covariates$state=="DE"]<-10
final.covariates$statefip[final.covariates$state=="MA"]<-25
final.covariates$statefip[final.covariates$state=="ME"]<-23
final.covariates$statefip[final.covariates$state=="NH"]<-33
final.covariates$statefip[final.covariates$state=="NJ"]<-34
final.covariates$statefip[final.covariates$state=="NY"]<-36
final.covariates$statefip[final.covariates$state=="PA"]<-42
final.covariates$statefip[final.covariates$state=="RI"]<-44
final.covariates$statefip[final.covariates$state=="VT"]<-50
write.csv(final.covariates,"krigedPoints.csv",row.names=F)
